from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, Response

import os
pdf_folder = "./pdfs"
output_folder = "./txts"
os.makedirs(output_folder, exist_ok=True)

# 读取 PDF 文件
reader = SimpleDirectoryReader(input_dir=pdf_folder, required_exts=[".pdf"])
documents = reader.load_data()

for doc in documents:
    pdf_filename = doc.metadata.get("file_name", "unknown").replace(".pdf", ".txt")  # 获取 PDF 文件名
    txt_path = os.path.join(output_folder, pdf_filename)  # 生成对应的 TXT 文件路径
    
    with open(txt_path, "w", encoding="utf-8") as f:
        f.write(doc.text)

    print(f"Saved: {txt_path}")